{ "cells": [ { "cell_type": "markdown", "id": "35b6cf24-4a29-4619-a1a1-8a6620548ce3", "metadata": {}, "source": [ "# DISCLAIMER\n", "The `random_state` is set to 42 because it is a common convention choice in tutorials and assignments (a \"fun convention\" from *The Hitchhiker's Guide to the Galaxy*). You can pick any integer, but different integers produce different sequence of randomness so your results will not be identical across different seeds. Using a fixed `random_state` ensures reproducibility of results." ] }, { "cell_type": "markdown", "id": "2f9483f4-b88e-46b9-90b2-38f3a50d27d0", "metadata": {}, "source": [ "# Imports" ] }, { "cell_type": "code", "execution_count": 1, "id": "b1196064-033d-4614-a63e-f85e870ed395", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: imblearn in c:\\users\\carlo\\anaconda3\\lib\\site-packages (0.0)\n", "Requirement already satisfied: imbalanced-learn in c:\\users\\carlo\\anaconda3\\lib\\site-packages (from imblearn) (0.12.3)\n", "Requirement already satisfied: numpy>=1.17.3 in c:\\users\\carlo\\anaconda3\\lib\\site-packages (from imbalanced-learn->imblearn) (1.26.4)\n", "Requirement already satisfied: scipy>=1.5.0 in c:\\users\\carlo\\anaconda3\\lib\\site-packages (from imbalanced-learn->imblearn) (1.16.3)\n", "Requirement already satisfied: scikit-learn>=1.0.2 in c:\\users\\carlo\\anaconda3\\lib\\site-packages (from imbalanced-learn->imblearn) (1.4.2)\n", "Requirement already satisfied: joblib>=1.1.1 in c:\\users\\carlo\\anaconda3\\lib\\site-packages (from imbalanced-learn->imblearn) (1.4.2)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in c:\\users\\carlo\\anaconda3\\lib\\site-packages (from imbalanced-learn->imblearn) (2.2.0)\n" ] } ], "source": [ "#Imports\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import pydotplus\n", "import imblearn\n", "\n", "from sklearn import tree, preprocessing\n", "from sklearn.datasets import load_wine\n", "from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold, GridSearchCV\n", "from sklearn.tree import DecisionTreeClassifier, plot_tree, export_text\n", "from sklearn.metrics import accuracy_score, confusion_matrix, classification_report\n", "from sklearn.preprocessing import StandardScaler, MinMaxScaler\n", "from sklearn.utils import resample\n", "from sklearn.pipeline import Pipeline\n", "from IPython.display import Image\n", "from pandas.plotting import scatter_matrix\n", "\n", "\n", "from imblearn.over_sampling import RandomOverSampler\n", "from imblearn.under_sampling import RandomUnderSampler\n", "!pip install imblearn" ] }, { "cell_type": "markdown", "id": "d1998baf-562f-4430-9e01-29ae901c8d84", "metadata": {}, "source": [ "### Load wine dataset" ] }, { "cell_type": "code", "execution_count": 2, "id": "a4873a6c-d26e-4421-877e-bec6d6380d99", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | alcohol | \n", "malic_acid | \n", "ash | \n", "alcalinity_of_ash | \n", "magnesium | \n", "total_phenols | \n", "flavanoids | \n", "nonflavanoid_phenols | \n", "proanthocyanins | \n", "color_intensity | \n", "hue | \n", "od280/od315_of_diluted_wines | \n", "proline | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "14.23 | \n", "1.71 | \n", "2.43 | \n", "15.6 | \n", "127.0 | \n", "2.80 | \n", "3.06 | \n", "0.28 | \n", "2.29 | \n", "5.64 | \n", "1.04 | \n", "3.92 | \n", "1065.0 | \n", "
| 1 | \n", "13.20 | \n", "1.78 | \n", "2.14 | \n", "11.2 | \n", "100.0 | \n", "2.65 | \n", "2.76 | \n", "0.26 | \n", "1.28 | \n", "4.38 | \n", "1.05 | \n", "3.40 | \n", "1050.0 | \n", "
| 2 | \n", "13.16 | \n", "2.36 | \n", "2.67 | \n", "18.6 | \n", "101.0 | \n", "2.80 | \n", "3.24 | \n", "0.30 | \n", "2.81 | \n", "5.68 | \n", "1.03 | \n", "3.17 | \n", "1185.0 | \n", "
| 3 | \n", "14.37 | \n", "1.95 | \n", "2.50 | \n", "16.8 | \n", "113.0 | \n", "3.85 | \n", "3.49 | \n", "0.24 | \n", "2.18 | \n", "7.80 | \n", "0.86 | \n", "3.45 | \n", "1480.0 | \n", "
| 4 | \n", "13.24 | \n", "2.59 | \n", "2.87 | \n", "21.0 | \n", "118.0 | \n", "2.80 | \n", "2.69 | \n", "0.39 | \n", "1.82 | \n", "4.32 | \n", "1.04 | \n", "2.93 | \n", "735.0 | \n", "